Random KFG Bin Sample:
    —  Pendant Pendant Sum Monte Carlo Simulation


1. Creating the distributions

1.1 The Existing FieldGuide Distribution

The first dataframe to build is a database of sums using the khipus in the existing KFG.

Code
import math
import random
from random import choices

import numpy as np
import pandas as pd
import khipu_kamayuq as kamayuq  # A Khipu Maker is known (in Quechua) as a Khipu Kamayuq
import khipu_qollqa as kq
from pandas import Series, DataFrame

# Plotly
import plotly
from plotly.offline import iplot, init_notebook_mode
import plotly.graph_objs as go
import plotly.express as px
import plotly.figure_factory as ff
plotly.offline.init_notebook_mode(connected = False)

from monte_carlo import DiscreteDistributionSampler, PendantSummer, StrawmanKhipu
Code
(khipu_dict, all_khipus) = kamayuq.fetch_khipus()
strawmen_kfg_khipu = [StrawmanKhipu(aKhipu.name(), "KFG", [aCord.knotted_value() for aCord in aKhipu.pendant_cords()]) for aKhipu in all_khipus]
strawmen_kfg_df = pd.DataFrame([aStrawmanKhipu.dataframe_tuple() for aStrawmanKhipu in strawmen_kfg_khipu], columns=StrawmanKhipu.dataframe_columns())
strawmen_kfg_df.head()
name source num_pendants mean_cord_value stdev_cord_value num_right_sums num_left_sums num_sums mean_num_summands stdev_num_summands mean_sum_value stdev_sum_value num_sums_per_nonzero_pendant mean_right_handedness stdev_right_handedness mean_left_handedness stdev_left_handedness
0 AS010 KFG 27 8 10.392305 3 2 5 3.0 1.000000 22.0 9.695360 0.227273 4.333333 0.577350 -6.5 4.949747
1 AS011 KFG 15 92 183.904867 0 0 0 0.0 0.000000 0.0 0.000000 0.000000 0.000000 0.000000 0.0 0.000000
2 AS012 KFG 85 2 5.196152 3 4 7 6.0 2.449490 18.0 7.348469 0.233333 10.333333 6.027714 -9.0 5.099020
3 AS013 KFG 90 4 14.456832 0 5 5 14.0 14.456832 48.0 44.508426 0.121951 0.000000 0.000000 -19.0 8.860023
4 AS014 KFG 42 53 40.137264 1 2 3 2.0 0.000000 99.0 7.000000 0.071429 17.000000 0.000000 -15.0 4.242641
Code
total_right_sums = sum(strawmen_kfg_df.num_right_sums.tolist())
total_left_sums = sum(strawmen_kfg_df.num_left_sums.tolist())
total_sums = total_right_sums + total_left_sums
left_pct = round(100.0*float(total_left_sums)/float(total_sums)) if total_sums > 0 else 0
right_pct = round(100.0*float(total_right_sums)/float(total_sums)) if total_sums > 0 else 0
(left_handed_mean, right_handed_mean) = (round(strawmen_kfg_df.mean_left_handedness.mean(),1), round(strawmen_kfg_df.mean_right_handedness.mean(),1))
(left_handed_stdev, right_handed_stdev) = (round(strawmen_kfg_df.mean_left_handedness.std(),1), round(strawmen_kfg_df.mean_right_handedness.std(),1))

print(f"Existing KFG - Right/Left Distribution = {right_pct}%/{left_pct}% ({total_right_sums}/{total_left_sums=})")
print(f"             - Right/Left Mean Handedness = {right_handed_mean}/{left_handed_mean} ±({right_handed_stdev}/{left_handed_stdev})")

strawmen_kfg_df.describe()
Existing KFG - Right/Left Distribution = 54%/46% (4354/total_left_sums=3734)
             - Right/Left Mean Handedness = 9.9/-8.5 ±(15.0/14.2)
num_pendants mean_cord_value stdev_cord_value num_right_sums num_left_sums num_sums mean_num_summands stdev_num_summands mean_sum_value stdev_sum_value num_sums_per_nonzero_pendant mean_right_handedness stdev_right_handedness mean_left_handedness stdev_left_handedness
count 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000
mean 64.109231 309.835385 496.113591 6.698462 5.744615 12.443077 3.156923 2.032227 146.203077 70.992234 0.160933 9.914679 6.387789 -8.511022 5.777778
std 102.325678 1637.638528 2778.219238 13.630774 12.081363 25.319237 3.620689 3.925252 1052.163975 224.887696 0.179440 15.025712 12.766330 14.176137 11.853944
min 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -123.303371 0.000000
25% 14.000000 5.000000 7.071068 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -11.555556 0.000000
50% 32.000000 19.500000 26.267844 1.000000 1.000000 2.000000 2.000000 0.000000 25.000000 5.291503 0.105823 5.666667 0.000000 -3.333333 0.000000
75% 77.750000 78.750000 134.914450 6.000000 5.000000 11.750000 5.000000 2.828427 67.000000 41.225340 0.250000 13.925926 7.771955 0.000000 7.681053
max 1650.000000 26324.000000 42206.089632 105.000000 101.000000 200.000000 27.000000 37.094474 24072.000000 3210.316028 0.823009 150.525773 129.388714 0.000000 117.441998

Of interest, note that for the existing KFG, handedness is 9 to 10, and it is asymmetric - that is the middle of the summands, by index, is a mean of 9 cords away from the sum cord. There are .16 sums per nonzero pendant.

1.2 Strawman Khipus based on a Random KFG Distribution

Next, we make a dataframe based on randomly generated khipus that has:

  1. The same number of khipus as the KFG
  2. A pendant cord count chosen randomly from the existing khipus’ pendant cord counts
  3. Pendant values that are randomly chosen from the existing KFG cord values.
Code
cords_per_khipu = [aKhipu.num_pendant_cords() for aKhipu in all_khipus]
# Produce a khipu with # of cords sampled from the KFG khipu distribution
def sample_kfg_num_cords():
    num_cords = choices(cords_per_khipu,k=1)[0]
    if num_cords<3: num_cords = 3 # Mutate trivial khipus.
    return num_cords

# Number of random strawman khipus to produce
num_dummy_khipus = len(all_khipus)

cord_values = []
for aKhipu in all_khipus:
    cord_values += [aCord.knotted_value() for aCord in aKhipu.pendant_cords() if aCord.knotted_value() > 0]
sampler = DiscreteDistributionSampler(cord_values)

random_names = [f"rkfg_khipu_{i:05d}" for i in range(num_dummy_khipus)]
strawmen_rkfg_khipu = []
for i in range(num_dummy_khipus):
    random_cords = [round(x) for x in  sampler.bin_sample(sample_kfg_num_cords())]
    strawmen_rkfg_khipu.append( StrawmanKhipu(random_names[i], "runif", random_cords) )
strawmen_rkfg_df = pd.DataFrame([aStrawmanKhipu.dataframe_tuple() for aStrawmanKhipu in strawmen_rkfg_khipu], columns=StrawmanKhipu.dataframe_columns())
strawmen_rkfg_df.head()
name source num_pendants mean_cord_value stdev_cord_value num_right_sums num_left_sums num_sums mean_num_summands stdev_num_summands mean_sum_value stdev_sum_value num_sums_per_nonzero_pendant mean_right_handedness stdev_right_handedness mean_left_handedness stdev_left_handedness
0 rkfg_khipu_00000 runif 32 417.312500 1433.013145 0 4 4 4.0 4.472136 120.250000 183.301891 0.125000 0.0 0.000000 -12.25 9.742518
1 rkfg_khipu_00001 runif 19 54.578947 112.914085 2 0 2 5.0 2.828427 214.500000 262.336616 0.105263 6.5 2.121320 0.00 0.000000
2 rkfg_khipu_00002 runif 10 105.000000 215.724104 0 0 0 0.0 0.000000 0.000000 0.000000 0.000000 0.0 0.000000 0.00 0.000000
3 rkfg_khipu_00003 runif 27 214.074074 683.023703 3 0 3 6.0 6.403124 947.333333 1604.481640 0.111111 11.0 4.582576 0.00 0.000000
4 rkfg_khipu_00004 runif 27 84.185185 143.927876 0 2 2 3.0 2.000000 50.500000 50.204581 0.074074 0.0 0.000000 -15.00 7.071068
Code
total_right_sums = sum(strawmen_rkfg_df.num_right_sums.tolist())
total_left_sums = sum(strawmen_rkfg_df.num_left_sums.tolist())
total_sums = total_right_sums + total_left_sums
print(f"{total_right_sums=} {total_left_sums=}")
left_pct = round(100.0*float(total_left_sums)/float(total_sums)) if total_sums > 0 else 0
right_pct = round(100.0*float(total_right_sums)/float(total_sums)) if total_sums > 0 else 0
(left_handed_mean, right_handed_mean) = (round(strawmen_rkfg_df.mean_left_handedness.mean(),1), round(strawmen_rkfg_df.mean_right_handedness.mean(),1))
(left_handed_stdev, right_handed_stdev) = (round(strawmen_rkfg_df.mean_left_handedness.std(),1), round(strawmen_rkfg_df.mean_right_handedness.std(),1))

print(f"Random KFG - Right/Left Distribution = {right_pct}%/{left_pct}% ({total_right_sums}/{total_left_sums=})")
print(f"           - Right/Left Mean Handedness = {right_handed_mean}/{left_handed_mean} ±({right_handed_stdev}/{left_handed_stdev})")

strawmen_rkfg_df.describe()
total_right_sums=10610 total_left_sums=10514
Random KFG - Right/Left Distribution = 50%/50% (10610/total_left_sums=10514)
           - Right/Left Mean Handedness = 12.5/-12.3 ±(13.0/13.0)
num_pendants mean_cord_value stdev_cord_value num_right_sums num_left_sums num_sums mean_num_summands stdev_num_summands mean_sum_value stdev_sum_value num_sums_per_nonzero_pendant mean_right_handedness stdev_right_handedness mean_left_handedness stdev_left_handedness
count 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000 650.000000
mean 65.656923 337.083721 1250.850541 16.323077 16.175385 32.498462 2.735385 2.076469 92.076141 199.379435 0.237186 12.543546 9.248155 -12.319211 9.381162
std 121.786639 1671.626348 5118.629259 49.004263 48.696127 97.598218 1.918399 2.563036 132.220616 416.205046 0.212900 12.955093 12.735598 12.971127 12.907701
min 3.000000 4.666667 3.932768 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -75.119883 0.000000
25% 12.000000 75.766026 147.210025 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -20.099696 0.000000
50% 32.000000 157.701208 468.338420 3.000000 3.000000 6.000000 3.000000 1.414214 59.972973 50.325395 0.194444 10.000000 5.191793 -9.083333 4.405330
75% 80.000000 299.861423 1069.302015 15.000000 15.000000 29.750000 4.000000 3.162278 130.392157 220.179981 0.387399 20.237500 13.676254 0.000000 15.679902
max 1650.000000 40258.000000 113249.594284 702.000000 712.000000 1414.000000 8.000000 14.035669 1739.000000 4594.792355 0.856970 74.497151 96.889289 0.000000 100.346936

2. Random KFG vs. Existing KFG - Graphical Distribution

To graphically compare the distributions of the random khipus with existing khipus, one dataframe is needed:

Code
def source_color(x): return 0.0 if x == "KFG" else 1.0

combined_kfg_rkfg_df = pd.concat([strawmen_kfg_df, strawmen_rkfg_df], axis=0)
combined_kfg_rkfg_df['source_color'] = [source_color(x) for x in combined_kfg_rkfg_df.source.values]
Code
kfg_left =  sum(strawmen_kfg_df.num_left_sums.tolist())
kfg_right = sum(strawmen_kfg_df.num_right_sums.tolist())
pct_kfg_left = round(100.0*float(kfg_left)/float(kfg_left+kfg_right)) if ((kfg_left+kfg_right) > 0) else 0
pct_kfg_right = round(100.0*float(kfg_right)/float(kfg_left+kfg_right)) if ((kfg_left+kfg_right) > 0) else 0
rkfg_left = sum(strawmen_rkfg_df.num_left_sums.tolist())
rkfg_right = sum(strawmen_rkfg_df.num_right_sums.tolist())
pct_rkfg_left = round(100.0*float(rkfg_left)/float(rkfg_left+rkfg_right)) if ((rkfg_left+rkfg_right) > 0) else 0
pct_rkfg_right = round(100.0*float(rkfg_right)/float(rkfg_left+rkfg_right)) if ((rkfg_left+rkfg_right) > 0) else 0

print(f"Num Right/Left Sums for Existing KFG:{kfg_right}/{kfg_left} ({pct_kfg_right}%/{pct_kfg_left}%)")
print(f"Num Right/Left Sums for Random KFG: {rkfg_right}/{rkfg_left} ({pct_rkfg_right}%/{pct_rkfg_left}%)")
Num Right/Left Sums for Existing KFG:4354/3734 (54%/46%)
Num Right/Left Sums for Random KFG: 10610/10514 (50%/50%)
Code
legend_text = "<b>Random KFG vs Existing KFG - #Sums:</b><i style=\"font-size:10pt;\"> Blue-KFG, Red-Random, Size-#Summands</i>"
fig = (px.scatter(combined_kfg_rkfg_df, x="num_right_sums", y="num_left_sums", log_y=True,log_x=True,
                 size="mean_num_summands",
                 opacity=.4, 
                 color='source_color', color_continuous_scale=['#3c3fff', '#ff3030',],
                 labels={"name": f"Khipu Name"},
                 hover_data=['name'], title=legend_text,
                 width=944, height=944)
        .update_layout(showlegend=False).update(layout_coloraxis_showscale=False).show()
      )

As expected, random sums that are small occur more. They also have fewer summands. Let’s evaluate the number of summands for the random khipus vs. the existing khipus.

Code
legend_text = "<b>Random KFG vs Existing KFG - #Sums vs #Summands:</b><i style=\"font-size:10pt;\"> Blue-KFG, Red-Random, Size-#Sums/Pendan</i>"
fig = (px.scatter(combined_kfg_rkfg_df, x="mean_num_summands", y="num_sums", log_y=True,#log_x=True,
                 size="num_sums_per_nonzero_pendant",
                 opacity=.4, 
                 color='source_color', color_continuous_scale=['#3c3fff', '#ff3030',],
                 labels={"name": f"Khipu Name"},
                 hover_data=['name'], title=legend_text,
                 width=944, height=944)
        .update_layout(showlegend=False).update(layout_coloraxis_showscale=False).show()
      )

This echoes the previous statement about the number of summands being very different in the Random KFG set! A relatively clear separation occurs.

Code
legend_text = "<b>Random KFG vs Existing KFG - Sum Handedness:</b><i style=\"font-size:10pt;\"> Blue-KFG, Red-Random, Size-#Summands</i>"
fig = (px.scatter(combined_kfg_rkfg_df, x="mean_left_handedness", y="mean_right_handedness",
                 size="mean_num_summands", 
                 opacity=0.3,
                 color='source_color', color_continuous_scale=['#3c3fff', '#ff3030', ],
                 labels={"name": f"Khipu Name"},
                 hover_data=['name'], title=legend_text,
                 width=944, height=944)
        .update_layout(showlegend=False).update(layout_coloraxis_showscale=False).show()
      )

Existing KFG Khipus tend have their sums close, for obvious reasons. However the randomly generated khipus have many more far sums, with a small number of overall summands.

Code
legend_text = "<b>Random KFG vs Existing KFG - Mean Sum vs #Sums/Pendant:</b><i style=\"font-size:10pt;\"> Blue-KFG, Red-Random, Size-#Pendant</i>"
fig = (px.scatter(combined_kfg_rkfg_df, x="num_sums_per_nonzero_pendant", y="mean_sum_value", log_y=True,
                 size="num_pendants", 
                 opacity=0.5,
                 color='source_color', color_continuous_scale=['#3c3fff', '#ff3030', ],
                 labels={"name": f"Khipu Name"},
                 hover_data=['name', 'num_sums', 'mean_sum_value'], title=legend_text,
                 width=944, height=944)
        .update_layout(showlegend=False).update(layout_coloraxis_showscale=False).show()
      )

3. Frequency Distributions

An examination of frequency distributions for key variables, using violin plots, where width=frequency and height=variable being measured.

Code
combined_kfg_rkfg_df['handedness_bias'] = [(abs(a)-abs(b)) for a,b in zip(combined_kfg_rkfg_df['num_right_sums'].values.tolist(), combined_kfg_rkfg_df['num_left_sums'].values.tolist())]
combined_kfg_rkfg_df['source'] = ["KFG" if source == 'KFG' else "Random KFG" for source in combined_kfg_rkfg_df.source.values.tolist()]
legend_text = "<b>Random KFG vs KFG - Handedness Bias (#RightHandedSums - #LeftHandedSums)</b>"
fig = (px.violin(combined_kfg_rkfg_df, y="handedness_bias",  
                 points='all', color="source",
                 hover_data=['name', 'num_sums'], title=legend_text,
                 width=944, height=944).show())
Code
legend_text = "<b>Random KFG vs KFG - Log(Mean Cord Value)</b>"
combined_kfg_rkfg_df['log_mean_cord_value'] = [math.log(abs(x)) if x > 0 else 0 for x in combined_kfg_rkfg_df['mean_cord_value'].values.tolist()]
fig = (px.violin(combined_kfg_rkfg_df, y="log_mean_cord_value",  
                 points='all', color="source",
                 hover_data=['name', 'num_sums'], title=legend_text,
                 width=944, height=944).show())
Code
legend_text = "Violin Plot <b>Random KFG vs Existing KFG -  Log(Sum Mean))</b>"
combined_kfg_rkfg_df['log_mean_sum'] = [math.log(abs(x)) if x > 0 else 0 for x in combined_kfg_rkfg_df['mean_sum_value'].values.tolist()]
fig = (px.violin(combined_kfg_rkfg_df, y="log_mean_sum", 
                 labels={"log_mean_sum": "Log(Sum Mean)"},
                 points='all', color="source",
                 hover_data=['name', 'num_sums'], title=legend_text,
                 width=944, height=944).show())
Code
legend_text = "Violin Plot <b>Random KFG vs Existing KFG - #Sums/Pendant</b>"
fig = (px.violin(combined_kfg_rkfg_df, y="num_sums_per_nonzero_pendant",  
                 points='all', color="source",
                 labels={"num_sums_per_nonzero_pendant": "#Sums/Pendant"},
                 hover_data=['name', 'num_sums', 'mean_sum_value'], title=legend_text,
                 width=944, height=944).show())
Code
legend_text = "<b>Violin Plot - Random KFG vs Existing KFG - #Summands per Sum</b>"
fig = (px.violin(combined_kfg_rkfg_df, y="mean_num_summands",  
                 points='all', color="source",
                 labels={"mean_num_summands": "#Summands per Sum"},
                 hover_data=['name', 'num_sums', 'mean_sum_value'], title=legend_text,
                 width=944, height=944).show())

This is also as you would expect - khipus that are randomly generated tend to not have large sums and they don’t have as many summands per pendant cord sum.